Application 5: Time Trends

Author

Erik Westlund

Published

June 12, 2025

# Set seed for reproducibility
set.seed(123)

# Generate time series data
n_weeks <- 104  # 2 years of weekly data
intervention_week <- 52  # Intervention at 1 year

# Create base data
time_data <- tibble(
  week = 1:n_weeks,
  date = as.Date("2023-01-01") + weeks(week - 1),
  # Base level (flat)
  base_level = 100,
  # Random noise
  noise = rnorm(n_weeks, mean = 0, sd = 10),
  # Intervention effect with decay
  intervention = ifelse(
    week >= intervention_week,
    30 * exp(-0.02 * (week - intervention_week)),  # Exponential decay
    0
  ),
  # Combine components
  value = base_level + noise + intervention
)

# Add gender effect for later use
time_data <- time_data |>
  crossing(gender = c("Male", "Female")) |>
  mutate(
    # Add gender-specific effects
    gender_effect = ifelse(gender == "Female", -15, 15),
    # Add gender-specific intervention effects with decay
    gender_intervention = ifelse(
      week >= intervention_week,
      ifelse(
        gender == "Female",
        40 * exp(-0.02 * (week - intervention_week)),  # Larger effect for females
        20 * exp(-0.02 * (week - intervention_week))
      ),
      0
    ),
    # Combine all effects
    value_gender = value + gender_effect + gender_intervention
  )
# Create basic time series plot
p_time <- ggplot(time_data, aes(x = date, y = value)) +
  # Add points
  geom_point(
    size = 2,
    alpha = 0.5,
    color = "gray50"
  ) +
  # Add line
  geom_line(
    color = "gray30",
    linewidth = 0.5
  ) +
  # Add vertical line for intervention
  geom_vline(
    xintercept = as.Date("2024-01-01"),
    linetype = "dashed",
    color = "red",
    alpha = 0.5
  ) +
  # Add labels
  labs(
    title = "Weekly Physical Activity Over Time",
    subtitle = "Intervention implemented at 1 year (dashed line)",
    x = "Date",
    y = "Minutes of Activity"
  ) +
  # Customize theme
  theme_minimal() +
  theme(
    plot.title = element_text(size = 12, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.subtitle = element_text(size = 10, hjust = 0.5, margin = margin(b = 20)),
    plot.title.position = "plot",
    axis.title = element_text(size = 10, face = "bold"),
    axis.text = element_text(size = 8),
    panel.grid.minor = element_blank()
  )

p_time

# Create time series plot with smoother
p_time_smooth <- ggplot(time_data, aes(x = date, y = value)) +
  # Add points
  geom_point(
    size = 2,
    alpha = 0.5,
    color = "gray50"
  ) +
  # Add smoother
  geom_smooth(
    method = "loess",
    span = 0.3,
    color = "blue",
    linewidth = 1,
    se = TRUE,
    alpha = 0.2
  ) +
  # Add vertical line for intervention
  geom_vline(
    xintercept = as.Date("2024-01-01"),
    linetype = "dashed",
    color = "red",
    alpha = 0.5
  ) +
  # Add labels
  labs(
    title = "Weekly Physical Activity Over Time with Smoother",
    subtitle = "Intervention implemented at 1 year (dashed line)",
    x = "Date",
    y = "Minutes of Activity"
  ) +
  # Customize theme
  theme_minimal() +
  theme(
    plot.title = element_text(size = 12, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.subtitle = element_text(size = 10, hjust = 0.5, margin = margin(b = 20)),
    plot.title.position = "plot",
    axis.title = element_text(size = 10, face = "bold"),
    axis.text = element_text(size = 8),
    panel.grid.minor = element_blank()
  )

p_time_smooth

# Calculate averages for annotation
avg_before <- time_data |>
  filter(date < as.Date("2024-01-01")) |>
  summarise(avg = mean(value)) |>
  mutate(
    x = as.Date("2023-07-01"),
    label = sprintf("Avg: %.0f min", avg)
  )

avg_after <- time_data |>
  filter(date >= as.Date("2024-01-01")) |>
  summarise(avg = mean(value)) |>
  mutate(
    x = as.Date("2024-07-01"),
    label = sprintf("Avg: %.0f min", avg)
  )

# Create time series plot with separate linear models
p_time_lm <- ggplot(time_data, aes(x = date, y = value)) +
  # Add points
  geom_point(
    size = 2,
    alpha = 0.5,
    color = "gray50"
  ) +
  # Add linear model for pre-intervention
  geom_smooth(
    data = filter(time_data, date < as.Date("2024-01-01")),
    method = "lm",
    color = "blue",
    linewidth = 1,
    se = TRUE,
    alpha = 0.2
  ) +
  # Add linear model for post-intervention
  geom_smooth(
    data = filter(time_data, date >= as.Date("2024-01-01")),
    method = "lm",
    color = "red",
    linewidth = 1,
    se = TRUE,
    alpha = 0.2
  ) +
  # Add vertical line for intervention
  geom_vline(
    xintercept = as.Date("2024-01-01"),
    linetype = "dashed",
    color = "black",
    alpha = 0.5
  ) +
  # Add arrow and annotation
  annotate(
    "segment",
    x = as.Date("2023-11-15"),
    xend = as.Date("2024-01-01"),
    y = 180,
    yend = 180,
    arrow = arrow(length = unit(0.2, "cm"), ends = "last", type = "closed"),
    color = "darkred"
  ) +
  annotate(
    "text",
    x = as.Date("2023-11-15"),
    y = 180,
    label = "Intervention\nStarted",
    hjust = 1.1,
    color = "darkred",
    fontface = "bold"
  ) +
  # Add average annotations
  geom_text(
    data = avg_before,
    aes(x = x, y = avg, label = label),
    hjust = 0.5,
    vjust = -6,
    fontface = "bold",
    color = "black"
  ) +
  geom_text(
    data = avg_after,
    aes(x = x, y = avg, label = label),
    hjust = 0.5,
    vjust = -6,
    fontface = "bold",
    color = "black"
  ) +
  # Add labels
  labs(
    title = "Weekly Physical Activity Over Time",
    subtitle = "Linear trends before and after intervention",
    x = "Date",
    y = "Minutes of Activity"
  ) +
  # Customize theme
  theme_minimal() +
  theme(
    plot.title = element_text(size = 12, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.subtitle = element_text(size = 10, hjust = 0.5, margin = margin(b = 20)),
    plot.title.position = "plot",
    axis.title = element_text(size = 10, face = "bold"),
    axis.text = element_text(size = 8),
    panel.grid.minor = element_blank()
  )

p_time_lm

# Set seed for reproducibility
set.seed(123)

# Generate dates for 4 years
dates <- seq.Date(
  from = as.Date("2020-01-01"),
  to = as.Date("2023-12-31"),
  by = "day"
)

# Generate person IDs and their base characteristics
n_people <- 300
person_data <- tibble(
  person_id = 1:n_people,
  gender = sample(c("Male", "Female"), n_people, replace = TRUE),
  # Base weight varies by person (in pounds)
  base_weight = case_when(
    gender == "Male" ~ rnorm(n_people, mean = 176, sd = 15),  # Mean 176 lbs, SD 15
    TRUE ~ rnorm(n_people, mean = 143, sd = 12)               # Mean 143 lbs, SD 12
  ),
  # Individual seasonal sensitivity
  seasonal_sensitivity = rnorm(n_people, mean = 1, sd = 0.2)  # Some people more sensitive to seasons
)

# Create base data frame with all person-dates
weight_data <- crossing(
  date = dates,
  person_id = 1:n_people
) |>
  left_join(person_data, by = "person_id") |>
  mutate(
    month = month(date),
    day_of_year = yday(date),
    # Small seasonal effect (2-3 pounds total)
    seasonal_effect = case_when(
      month %in% c(12, 1, 2) ~ 2.5 * sin((day_of_year - 15) * 2 * pi / 365),  # Winter peak
      month %in% c(3, 4, 5) ~ 1.5 * sin((day_of_year - 15) * 2 * pi / 365),   # Spring
      month %in% c(6, 7, 8) ~ 0,                                              # Summer
      TRUE ~ 2.0 * sin((day_of_year - 15) * 2 * pi / 365)                     # Fall
    ),
    # Apply individual sensitivity to seasonal effect
    seasonal_effect = seasonal_effect * seasonal_sensitivity,
    # Add very small random noise (0.2 lbs)
    noise = rnorm(n(), 0, 0.2),
    # Calculate final weight
    weight = base_weight + seasonal_effect + noise
  )

# Calculate daily averages by gender
daily_averages <- weight_data |>
  group_by(date, gender) |>
  summarise(
    avg_weight = mean(weight),
    .groups = "drop"
  )

# Create annotation data frame
annotation_dates <- c(
  as.Date("2020-11-01"), as.Date("2020-12-31"),
  as.Date("2021-11-01"), as.Date("2021-12-31"),
  as.Date("2022-11-01"), as.Date("2022-12-31"),
  as.Date("2023-11-01"), as.Date("2023-12-31")
)

annotations <- daily_averages |>
  filter(date %in% annotation_dates) |>
  mutate(
    label = sprintf("%.1f", avg_weight),
    text_x = case_when(
      month(date) == 11 ~ date - 56,  # Dodge left for November
      TRUE ~ date + 56                # Dodge right for December
    ),
    text_y = case_when(
      gender == "Male" ~ 185,  # Fixed position for men
      TRUE ~ 155               # Fixed position for women
    )
  )

# Create time series plot
p_weight <- ggplot(daily_averages, aes(x = date, y = avg_weight, color = gender)) +
  # Add holiday season shading
  annotate(
    "rect",
    xmin = as.Date("2020-11-01"),
    xmax = as.Date("2020-12-31"),
    ymin = -Inf,
    ymax = Inf,
    fill = "gray90",
    alpha = 0.5
  ) +
  annotate(
    "rect",
    xmin = as.Date("2021-11-01"),
    xmax = as.Date("2021-12-31"),
    ymin = -Inf,
    ymax = Inf,
    fill = "gray90",
    alpha = 0.5
  ) +
  annotate(
    "rect",
    xmin = as.Date("2022-11-01"),
    xmax = as.Date("2022-12-31"),
    ymin = -Inf,
    ymax = Inf,
    fill = "gray90",
    alpha = 0.5
  ) +
  annotate(
    "rect",
    xmin = as.Date("2023-11-01"),
    xmax = as.Date("2023-12-31"),
    ymin = -Inf,
    ymax = Inf,
    fill = "gray90",
    alpha = 0.5
  ) +
  # Add vertical lines for Nov 1 and Dec 31
  geom_vline(
    data = annotations,
    aes(xintercept = date),
    linewidth = 0.3,
    alpha = 0.5,
    show.legend = FALSE
  ) +
  # Add smoothed lines
  geom_smooth(
    method = "loess",
    span = 0.1,
    linewidth = 1,
    se = TRUE,
    alpha = 0.2
  ) +
  # Add text labels
  geom_text(
    data = annotations,
    aes(
      x = text_x,
      y = text_y,
      label = label,
      color = gender
    ),
    size = 3,
    show.legend = FALSE
  ) +
  # Add labels
  labs(
    title = "Average Weight Over Time by Sex",
    x = "Date",
    y = "Weight (lbs)",
    caption = "Shaded area indicates holiday season (November 1 - December 31).\nText annotations show average weights on November 1 and December 31."
  ) +
  # Customize theme
  theme_minimal() +
  theme(
    plot.title = element_text(size = 12, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.title.position = "plot",
    plot.caption = element_text(size = 8, hjust = 0, margin = margin(t = 10)),
    axis.title = element_text(size = 10, face = "bold"),
    axis.text = element_text(size = 8),
    panel.grid.minor = element_blank(),
    legend.position = "right",
    legend.title = element_blank()
  ) +
  # Extend y-axis range
  scale_y_continuous(
    limits = c(120, 220),
    breaks = seq(120, 220, by = 20)
  ) +
  # Use different colors for genders
  scale_color_manual(
    values = c(
      "Male" = "darkblue",
      "Female" = "darkred"
    ),
    labels = c("Men", "Women")
  ) +
  # Remove rect from legend and fix the "a" issue
  guides(color = guide_legend(override.aes = list(shape = NA, fill = NA, linetype = 1, label = NULL, text = NULL)))

p_weight

# Create treatment pattern data
set.seed(123)

# Define possible treatments (common hypertension medications)
treatments <- c("ACE Inhibitor", "ARB", "CCB", "No Treatment")

# Generate patient data with treatment changes
n_patients <- 1000
patient_data <- tibble(
  patient_id = 1:n_patients,
  initial_treatment = sample(treatments, n_patients, replace = TRUE, 
                           prob = c(0.4, 0.3, 0.2, 0.1)),  # ACE inhibitors most common first-line
  # 60% of patients change treatment at least once
  has_change = rbinom(n_patients, 1, 0.6),
  # For those who change, determine second treatment
  second_treatment = case_when(
    has_change == 1 ~ sample(treatments, n_patients, replace = TRUE, 
                           prob = c(0.3, 0.3, 0.3, 0.1)),
    TRUE ~ initial_treatment
  ),
  # 30% of patients who changed once change again
  has_second_change = case_when(
    has_change == 1 ~ rbinom(n_patients, 1, 0.3),
    TRUE ~ 0
  ),
  # For those who change twice, determine third treatment
  third_treatment = case_when(
    has_second_change == 1 ~ sample(treatments, n_patients, replace = TRUE, 
                                  prob = c(0.2, 0.2, 0.2, 0.4)),
    TRUE ~ second_treatment
  )
)

# Create Sankey diagram data
# Create nodes data frame
nodes <- data.frame(
  name = c(
    treatments,  # First column
    treatments,  # Second column
    treatments   # Third column
  )
)

# Create links data frame
links <- patient_data |>
  count(initial_treatment, second_treatment, third_treatment) |>
  filter(n > 0) |>
  mutate(
    source = match(initial_treatment, nodes$name) - 1,
    target = match(second_treatment, nodes$name) - 1 + length(treatments),
    value = n
  ) |>
  select(source, target, value)

# Add second stage links
links_second <- patient_data |>
  count(second_treatment, third_treatment) |>
  filter(n > 0) |>
  mutate(
    source = match(second_treatment, nodes$name) - 1 + length(treatments),
    target = match(third_treatment, nodes$name) - 1 + 2 * length(treatments),
    value = n
  ) |>
  select(source, target, value)

links <- bind_rows(links, links_second)

# Create Sankey diagram
p_sankey <- sankeyNetwork(
  Links = links,
  Nodes = nodes,
  Source = "source",
  Target = "target",
  Value = "value",
  NodeID = "name",
  fontSize = 12,
  nodeWidth = 30,
  sinksRight = FALSE
)

# Add title using HTML
p_sankey <- htmlwidgets::prependContent(
  p_sankey,
  htmltools::tags$h3("Hypertension Treatment Pattern Flow Over Time")
)

# Create Sunburst plot data
# Create properly formatted sunburst data with three levels
sunburst_data <- patient_data |>
  # Create paths for each treatment stage
  mutate(
    ids = paste0("initial_", initial_treatment),
    labels = initial_treatment,
    parents = "",
    values = 1,
    level = "initial"
  ) |>
  bind_rows(
    patient_data |>
      filter(has_change == 1) |>
      mutate(
        ids = paste0("second_", initial_treatment, "_", second_treatment),
        labels = second_treatment,
        parents = paste0("initial_", initial_treatment),
        values = 1,
        level = "second"
      )
  ) |>
  bind_rows(
    patient_data |>
      filter(has_second_change == 1) |>
      mutate(
        ids = paste0("third_", initial_treatment, "_", second_treatment, "_", third_treatment),
        labels = third_treatment,
        parents = paste0("second_", initial_treatment, "_", second_treatment),
        values = 1,
        level = "third"
      )
  ) |>
  group_by(ids, labels, parents, level) |>
  summarise(values = sum(values), .groups = "drop")

# Create color scales for each level
initial_colors <- viridis::viridis(4)
second_colors <- viridis::viridis(4, begin = 0.3, end = 0.7)
third_colors <- viridis::viridis(4, begin = 0.6, end = 1)

# Create Sunburst plot using plotly
p_sunburst <- plot_ly(
  sunburst_data,
  ids = ~ids,
  labels = ~labels,
  parents = ~parents,
  values = ~values,
  type = "sunburst",
  branchvalues = "total",
  marker = list(
    colors = case_when(
      sunburst_data$level == "initial" ~ initial_colors[match(sunburst_data$labels, treatments)],
      sunburst_data$level == "second" ~ second_colors[match(sunburst_data$labels, treatments)],
      sunburst_data$level == "third" ~ third_colors[match(sunburst_data$labels, treatments)]
    )
  )
) |>
  layout(
    title = "Hypertension Treatment Pattern Distribution",
    width = 800,
    height = 800
  )
Warning: Specifying width/height in layout() is now deprecated.
Please specify in ggplotly() or plot_ly()
# Display plots
p_sankey

Hypertension Treatment Pattern Flow Over Time

p_sunburst